{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "787ec3d3-eef1-4687-b164-881132cdefdb",
   "metadata": {
    "tags": []
   },
   "source": [
    "# Testing and Evaluating Options Pricing Models"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "0bfc0241-b312-45cd-b1b6-cfecab9ad85e",
   "metadata": {},
   "source": [
    "# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "0d0d0add-ff03-4c37-af3c-3942c8adc582",
   "metadata": {
    "tags": []
   },
   "source": [
    "## Load the Libs we need"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "4b0d19b3-7892-470c-8051-b3460605ed1a",
   "metadata": {},
   "outputs": [],
   "source": [
    "# import Lib\n",
    "import pandas as pd\n",
    "import datetime as dt\n",
    "import pytz\n",
    "import os\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import yfinance as yf\n",
    "import scipy.stats as si\n",
    "import math\n",
    "import networkx as nx\n",
    "\n",
    "# import module\n",
    "from datetime import datetime, timezone\n",
    "from datetime import date, time\n",
    "from math import trunc\n",
    "from dateutil.parser import parse"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "36335255-3a12-4108-b282-d524a5e889cb",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "id": "5649334c-a1a9-49f1-ab39-dff1bea2b512",
   "metadata": {},
   "source": [
    "## Introduction to Testing and Evaluating Trading Models"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "cd82d377-850f-44c6-810a-ca74c3283cab",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Mean Absolute Error: 0.40\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "from sklearn.linear_model import LinearRegression\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.metrics import mean_absolute_error\n",
    "\n",
    "# Mock data\n",
    "past_prices = np.array([100, 105, 103, 108, 107]).reshape(-1, 1)\n",
    "option_prices = np.array([5, 4.5, 5.2, 4.8, 4.7])\n",
    "\n",
    "# Split the data into training and testing subsets\n",
    "X_train, X_test, y_train, y_test = train_test_split(past_prices, option_prices, test_size=0.2, random_state=42)\n",
    "\n",
    "# Train a linear regression model\n",
    "model = LinearRegression().fit(X_train, y_train)\n",
    "\n",
    "# Evaluate the model's performance using Mean Absolute Error (MAE)\n",
    "predicted = model.predict(X_test)\n",
    "mae = mean_absolute_error(y_test, predicted)\n",
    "\n",
    "print(f\"Mean Absolute Error: {mae:.2f}\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "669a7722-961e-4095-bc90-2affc5c134e0",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "id": "cbcf98c6-6677-45df-b2e2-fb6b204224b1",
   "metadata": {},
   "source": [
    "## Performance Metrics for Trading Models"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "49e2d97f-ba68-4d2f-8584-75a2ca61b784",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Sharpe Ratio: 0.27\n",
      "Sortino Ratio: 0.58\n",
      "Maximum Drawdown: -0.12\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "\n",
    "# Define the Sharpe Ratio function\n",
    "def sharpe_ratio(returns, risk_free_rate=0.01):\n",
    "    excess_returns = returns - risk_free_rate\n",
    "    return np.mean(excess_returns) / np.std(excess_returns)\n",
    "\n",
    "# Define the Sortino Ratio function\n",
    "def sortino_ratio(returns, risk_free_rate=0.01, target_return=0):\n",
    "    excess_returns = returns - risk_free_rate\n",
    "    downside_std = np.std(np.clip(excess_returns - target_return, None, 0))\n",
    "    return np.mean(excess_returns) / downside_std\n",
    "\n",
    "# Define the Maximum Drawdown function\n",
    "def max_drawdown(returns):\n",
    "    cumulative_returns = np.cumprod(1 + returns)\n",
    "    max_return = np.fmax.accumulate(cumulative_returns)\n",
    "    return np.min(cumulative_returns / max_return - 1)\n",
    "\n",
    "# Sample returns data\n",
    "returns = np.array([0.08, 0.12, -0.05, 0.10, -0.03, 0.15, 0.07, -0.06, 0.04, -0.10])\n",
    "\n",
    "# Print the metrics\n",
    "print(f\"Sharpe Ratio: {sharpe_ratio(returns):.2f}\")\n",
    "print(f\"Sortino Ratio: {sortino_ratio(returns):.2f}\")\n",
    "print(f\"Maximum Drawdown: {max_drawdown(returns):.2f}\")\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e17d2bf2-aecd-4ac6-9123-1d91821c9bc8",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "id": "38afb666-1735-40b6-ae63-82ec9e623057",
   "metadata": {},
   "source": [
    "## Backtesting Techniques"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "15ce8d38-ef69-4137-9332-98365f535405",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[4.86097561 4.94634146 4.81829268 4.73292683 4.77560976]\n"
     ]
    }
   ],
   "source": [
    "new_past_prices = np.array([106, 104, 107, 109, 108]).reshape(-1, 1)\n",
    "\n",
    "predicted_option_prices = model.predict(new_past_prices)\n",
    "print(predicted_option_prices)\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "0a3331c3-95b1-488f-9460-d462e35720c8",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['sell', 'buy', 'buy', 'sell', 'buy']\n"
     ]
    }
   ],
   "source": [
    "def rolling_window_backtest(data, window_size):\n",
    "    signals = []\n",
    "    for start in range(0, len(data) - window_size):\n",
    "        end = start + window_size\n",
    "        window_data = data[start:end]\n",
    "        moving_avg = np.mean(window_data)\n",
    "        if window_data[-1] > moving_avg:\n",
    "            signals.append('buy')\n",
    "        else:\n",
    "            signals.append('sell')\n",
    "    return signals\n",
    "\n",
    "data = np.array([100, 102, 99, 105, 104, 103, 108, 107])\n",
    "signals = rolling_window_backtest(data, window_size=3)\n",
    "print(signals)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "518866a3-5790-4e48-aadf-238b68d7d479",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "id": "e6dd680c-4dac-4766-b0c9-11980d69153d",
   "metadata": {},
   "source": [
    "## Model Validation Approaches"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "ae0fcb44-0610-43ad-868a-909f83dfb0a3",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Model R^2 Score: 0.73\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.linear_model import LinearRegression\n",
    "from sklearn.metrics import r2_score\n",
    "\n",
    "# Generate a larger mock dataset\n",
    "np.random.seed(42)  # for reproducibility\n",
    "\n",
    "# Creating a sequence of 500 numbers as past prices\n",
    "past_prices = np.linspace(50, 150, 500) \n",
    "\n",
    "# Generating option prices with a linear relationship + some random noise\n",
    "option_prices = 0.1 * past_prices + 5 + np.random.normal(0, 2, 500)\n",
    "\n",
    "# Split the data into training and testing sets\n",
    "X_train, X_test, y_train, y_test = train_test_split(past_prices.reshape(-1, 1), option_prices, test_size=0.2, random_state=42)\n",
    "\n",
    "# Train a linear regression model\n",
    "model = LinearRegression().fit(X_train, y_train)\n",
    "\n",
    "# Predict using the test set\n",
    "predictions = model.predict(X_test)\n",
    "\n",
    "# Calculate R^2 score\n",
    "accuracy = r2_score(y_test, predictions)\n",
    "\n",
    "print(f\"Model R^2 Score: {accuracy:.2f}\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "a1286ea8-1c20-4fd6-9b97-050409894ae9",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Cross-validation Scores: [0.07126362 0.08633118 0.01805444 0.0756227  0.00986164]\n",
      "Average Score: 0.05\n"
     ]
    }
   ],
   "source": [
    "from sklearn.model_selection import cross_val_score\n",
    "\n",
    "# Reshape the data to ensure it's in the expected 2D format\n",
    "X = past_prices.reshape(-1, 1)\n",
    "y = option_prices\n",
    "\n",
    "# Perform cross-validation\n",
    "scores = cross_val_score(LinearRegression(), X, y, cv=5)\n",
    "print(f\"Cross-validation Scores: {scores}\")\n",
    "print(f\"Average Score: {np.mean(scores):.2f}\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "9104e199-1250-44e6-86e2-debe03d30707",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[16.02148275 16.12297715 16.32596595 16.22447155 16.52895475 16.63044916\n",
      " 16.83343796 16.73194356 16.93493236 17.03642676]\n"
     ]
    }
   ],
   "source": [
    "# Example: Out-of-sample testing with a new dataset.\n",
    "\n",
    "# Generate new data points that the model hasn't seen and ensure it's reshaped\n",
    "new_data = np.array([110, 111, 113, 112, 115, 116, 118, 117, 119, 120]).reshape(-1, 1)\n",
    "\n",
    "# Use the model (which was trained on past_prices) to predict option prices for the new data\n",
    "predicted_prices = model.predict(new_data)\n",
    "print(predicted_prices)\n",
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9d0de498-9d02-4cf0-8204-1f838e6973ae",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "id": "f61e8c21-2328-4c7c-8f62-c1de4f6467bb",
   "metadata": {},
   "source": [
    "## Addressing Overfitting and Improving Model Performance"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "04944328-dfc2-4488-a6f6-1f0acd560e40",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Lasso Model Accuracy: 0.73\n"
     ]
    }
   ],
   "source": [
    "# Example: We'll use Lasso regression, a form of linear regression that includes a penalty term which can help in reducing overfitting.\n",
    "\n",
    "from sklearn.linear_model import Lasso\n",
    "\n",
    "lasso_model = Lasso(alpha=0.1).fit(X_train, y_train)\n",
    "predictions = lasso_model.predict(X_test)\n",
    "\n",
    "accuracy = lasso_model.score(X_test, y_test)\n",
    "print(f\"Lasso Model Accuracy: {accuracy:.2f}\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "f1a45fad-463a-446c-87f9-13f2c875132a",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Ridge Model Accuracy: 0.73\n"
     ]
    }
   ],
   "source": [
    "# Example: Using Ridge regression to reduce overfitting.\n",
    "from sklearn.linear_model import Ridge\n",
    "\n",
    "ridge_model = Ridge(alpha=0.5).fit(X_train, y_train)\n",
    "ridge_predictions = ridge_model.predict(X_test)\n",
    "\n",
    "accuracy = ridge_model.score(X_test, y_test)\n",
    "print(f\"Ridge Model Accuracy: {accuracy:.2f}\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "5a607066-b66a-43b6-9b64-281c1d9e3cbf",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Random Forest Model Accuracy: 0.65\n"
     ]
    }
   ],
   "source": [
    "# Example: Using Random Forests, which are less prone to overfitting compared to simple linear models.\n",
    "\n",
    "from sklearn.ensemble import RandomForestRegressor\n",
    "\n",
    "rf_model = RandomForestRegressor(n_estimators=100).fit(X_train, y_train)\n",
    "rf_predictions = rf_model.predict(X_test)\n",
    "\n",
    "accuracy = rf_model.score(X_test, y_test)\n",
    "print(f\"Random Forest Model Accuracy: {accuracy:.2f}\")\n",
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "156c7f93-6503-49d2-ba32-b2fc74580114",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
